# -*- coding=utf=8 -*-

import urllib.parse
import urllib.request
import requests
import sys
import re
import time
from bs4 import BeautifulSoup
from lxml import etree

'''
lxml 的whl文件下载地址：https://www.lfd.uci.edu/~gohlke/pythonlibs/

下载介质文件名：lxml‑4.1.1‑cp36‑cp36m‑win_amd64.whl

百度：python3 xpath text()
'''

sum_retries=1000

def request_webpage_url(url ,proxies,num_retries):
    #print('Downloading:', url)
    try:
        body_response = requests.get(url, verify=False, proxies=proxies)
    except :
        t, v, tb = sys.exc_info()
        # traceback.print_tb(tb)
        #traceback.print_tb(tb)
        body_response = None
        if num_retries > 0:
            print("request请求失败，开始重试，第"+str(sum_retries-num_retries+1)+"次")
            time.sleep(5)
            return request_webpage_url(url,proxies, num_retries-1)
    return body_response

if __name__ == '__main__':
    url="https://db.yaozh.com/hmap/38994.html"
    html_body_response = request_webpage_url(url, None, sum_retries)
    respon_webpage=html_body_response.content.decode("utf-8")
    root = etree.HTML(html_body_response.content)

    title_element=root.xpath('//title')[0]
    print(title_element.text)

    # trs=root.xpath('//tr[./th/text()="省"]')
    # print(len(trs))
    # print(type(trs))
    # print(trs[0])
    #
    # print(type(trs[0].xpath('//td/text()')[0]))
    tds=root.xpath('//tr[./th/text()="省"]/td/span')
    print(len(tds))
    print(type(tds[0]))
    print(tds[0].text)
    print(tds[0].text.replace(' ', '').replace("\n", ""))

